library(haven)
library(tidyverse)
library(hrbrthemes)
library(infer)

election_years <- c(1983,1987,1991,1995,1999,2003,2007,2009,2013,2016,2017,2021)

#### PARTY CHOICE

# Sample. How many party voters per year?
sample <- read_dta("temp/sampledata.dta")
sample <- sample %>% filter(fresh==1&party!=0&!is.na(party)) %>% group_by(year) %>% summarise(n=n())

# Population. Party vote shares each year
p <- read_dta("popdata/targets/party_1983-2021.dta")
p <- p %>% filter(party!=0) #%>%  group_by(year) %>%  mutate(total=sum(n),share=round(10000*(n/total),0))

# Calculate expected mean absolute error 
res <- NULL
for (i in sample$year) {
  print(i)
  pm <- p %>% filter(year==i)
  temp <- data.frame(party = rep(pm$party, pm$n)) %>% 
          rep_sample_n(size = sample$n[sample$year==i], reps = 10000) %>% 
          group_by(replicate,party) %>% 
          summarize(n=n()) %>% 
          group_by(replicate) %>% 
          mutate(year=i,n_total=sum(n),share=n/n_total)
  res <- rbind(res,temp)
  }

p <- p %>% group_by(year) %>%  mutate(total=sum(n),election=(n/total)) %>%  select(year,party,election)

res2 <- res %>% left_join(p,by=c("year","party"))

res3 <- res2 %>% mutate(mae=abs(share-election)) %>% 
              filter(party!=89) %>% 
              group_by(year,replicate) %>% 
                  summarise(mae_avg=100*mean(mae)) %>% 
              group_by(year) %>% 
                  summarise(min=min(mae_avg),
                            q025=quantile(mae_avg,0.025),
                            mae_median=median(mae_avg),
                            q975=quantile(mae_avg,0.975),
                            max=max(mae_avg),
                            mae_mean=mean(mae_avg))

saveRDS(res3,"temp/mae_simulation.Rdata")

# Party level
res4 <- res2 %>% mutate(error=share-election) %>% 
  group_by(year,party) %>% 
  summarise(min=min(error),
            q025=quantile(error,0.025),
            mae_median=median(error),
            q975=quantile(error,0.975),
            max=max(error),
            error_mean=mean(error))

saveRDS(res4,"temp/mae_simulation_partylevel.Rdata")


### DISTRICT

# Sample. How many party voters per year?
sample <- read_dta("temp/sampledata.dta")
sample <- sample %>% filter(fresh==1) %>% group_by(year) %>% summarise(n=n())

# Population. Party vote shares each year
p <- read_dta("popdata/targets/district_1983-2021.dta")
p <- p %>% mutate(capital=ifelse(district %in% c(3,4,6,11,12), 1, 0)) %>% 
        group_by(year,capital) %>% 
          summarize(n=sum(n))

# Calculate expected mean absolute error 
res <- NULL
for (i in election_years) {
  print(i)
  pm <- p %>% filter(year==i)
  temp <- data.frame(capital = rep(pm$capital, pm$n)) %>% 
    rep_sample_n(size = sample$n[sample$year==i], reps = 10000) %>% 
    group_by(replicate,capital) %>% 
    summarize(n=n()) %>% 
    group_by(replicate) %>% 
    mutate(year=i,n_total=sum(n),share=n/n_total)
  res <- rbind(res,temp)
}

p <- p %>% group_by(year) %>%  mutate(total=sum(n),election=(n/total)) %>%  select(year,capital,election)

res2 <- res %>% left_join(p,by=c("year","capital"))

res2_capital <- res2 %>% filter(capital==0) %>% 
  group_by(year) %>% 
  summarise(min=min(share),
            q025=quantile(share,0.025),
            mae_median=median(share),
            q975=quantile(share,0.975),
            max=max(share),
            mae_mean=mean(share))

saveRDS(res2_capital,"temp/mae_simulation_capital.Rdata")

### GENDER

# Sample. How many party voters per year?
sample <- read_dta("temp/sampledata.dta")
sample <- sample %>% filter(fresh==1) %>% group_by(year) %>% summarise(n=n())

# Population. Party vote shares each year
p <- read_dta("popdata/targets/male_1983-2021.dta")
p <- p %>% 
  group_by(year,male) %>% 
  summarize(n=sum(n))

# Calculate expected share
res <- NULL
for (i in election_years) {
  print(i)
  pm <- p %>% filter(year==i)
  temp <- data.frame(male = rep(pm$male, pm$n)) %>% 
    rep_sample_n(size = sample$n[sample$year==i], reps = 10000) %>% 
    group_by(replicate,male) %>% 
    summarize(n=n()) %>% 
    group_by(replicate) %>% 
    mutate(year=i,n_total=sum(n),share=n/n_total)
  res <- rbind(res,temp)
}

p <- p %>% group_by(year) %>%  mutate(total=sum(n),election=(n/total)) %>%  select(year,male,election)

res2 <- res %>% left_join(p,by=c("year","male"))

res2_female <- res2 %>% filter(male==0) %>% 
  group_by(year) %>% 
  summarise(min=min(share),
            q025=quantile(share,0.025),
            mae_median=median(share),
            q975=quantile(share,0.975),
            max=max(share),
            mae_mean=mean(share))

saveRDS(res2_female,"temp/mae_simulation_female.Rdata")

### AGE 18-29

# Sample. How many party voters per year?
sample <- read_dta("temp/sampledata.dta")
sample <- sample %>% filter(fresh==1) %>% group_by(year) %>% summarise(n=n())

# Population. Party vote shares each year
p <- read_dta("popdata/targets/agecat_1983-2021.dta")
p <- p %>% 
  group_by(year,agecat) %>% 
  summarize(n=sum(n))

# Calculate expected share
res <- NULL
for (i in election_years) {
  print(i)
  pm <- p %>% filter(year==i)
  temp <- data.frame(agecat = rep(pm$agecat, pm$n)) %>% 
    rep_sample_n(size = sample$n[sample$year==i], reps = 10000) %>% 
    group_by(replicate,agecat) %>% 
    summarize(n=n()) %>% 
    group_by(replicate) %>% 
    mutate(year=i,n_total=sum(n),share=n/n_total)
  res <- rbind(res,temp)
}

p <- p %>% group_by(year) %>%  mutate(total=sum(n),election=(n/total)) %>%  select(year,agecat,election)

res2 <- res %>% left_join(p,by=c("year","agecat"))

res2_agecat <- res2 %>% filter(agecat==1) %>% 
  group_by(year) %>% 
  summarise(min=min(share),
            q025=quantile(share,0.025),
            mae_median=median(share),
            q975=quantile(share,0.975),
            max=max(share),
            mae_mean=mean(share))

saveRDS(res2_agecat,"temp/mae_simulation_agecat.Rdata")

##  EDUCATION university

# Sample. How many party voters per year?
sample <- read_dta("temp/sampledata.dta")
sample <- sample %>% filter(fresh==1) %>% group_by(year) %>% summarise(n=n())

# Population. Party vote shares each year
p <- read_dta("popdata/targets/educ_1983-2021.dta")
p <- p %>% 
  group_by(year,educ) %>% 
  summarize(n=sum(n))

# Calculate expected share
res <- NULL
for (i in election_years) {
  print(i)
  pm <- p %>% filter(year==i)
  temp <- data.frame(educ = rep(pm$educ, pm$n)) %>% 
    rep_sample_n(size = sample$n[sample$year==i], reps = 10000) %>% 
    group_by(replicate,educ) %>% 
    summarize(n=n()) %>% 
    group_by(replicate) %>% 
    mutate(year=i,n_total=sum(n),share=n/n_total)
  res <- rbind(res,temp)
}

p <- p %>% group_by(year) %>%  mutate(total=sum(n),election=(n/total)) %>%  select(year,educ,election)

res2 <- res %>% left_join(p,by=c("year","educ"))

res2_uni <- res2 %>% filter(educ==3) %>% 
  group_by(year) %>% 
  summarise(min=min(share),
            q025=quantile(share,0.025),
            mae_median=median(share),
            q975=quantile(share,0.975),
            max=max(share),
            mae_mean=mean(share))

saveRDS(res2_uni,"temp/mae_simulation_uni.Rdata")


## EXTREME EXAMPLES OF AME FOR PARTY SUPPORT

temp <- data.frame(party = rep(c("A","B"), c(75000,75000))) %>% 
    rep_sample_n(size = 1500, reps = 10000) %>% 
    group_by(replicate,party) %>% 
    summarize(n=n()) %>% 
    group_by(replicate) %>% 
    mutate(n_total=sum(n),share=n/n_total)

temp <- temp %>% mutate(election=0.5)

temp2 <- temp %>% mutate(mae=abs(share-election)) %>% 
  group_by(replicate) %>% 
  summarise(mae_avg=100*mean(mae)) %>% 
  summarise(min=min(mae_avg),
            q025=quantile(mae_avg,0.025),
            mae_median=median(mae_avg),
            q975=quantile(mae_avg,0.975),
            max=max(mae_avg),
            mae_mean=mean(mae_avg))

# Expected MAE 1.03

temp <- data.frame(party = rep(c(letters[1:20]), rep(7500,20))) %>% 
  rep_sample_n(size = 1500, reps = 10000) %>% 
  group_by(replicate,party) %>% 
  summarize(n=n()) %>% 
  group_by(replicate) %>% 
  mutate(n_total=sum(n),share=n/n_total)

temp <- temp %>% mutate(election=0.05)

temp2 <- temp %>% mutate(mae=abs(share-election)) %>% 
  group_by(replicate) %>% 
  summarise(mae_avg=100*mean(mae)) %>% 
  summarise(min=min(mae_avg),
            q025=quantile(mae_avg,0.025),
            mae_median=median(mae_avg),
            q975=quantile(mae_avg,0.975),
            max=max(mae_avg),
            mae_mean=mean(mae_avg))
temp2 

# Expected MAE 0.444
